library(tidyverse)
library(stringr)

# Set the directory to the location of the replication data
setwd("")

# The survey firm, Dynata, provided the education variable separately to merge
# into the datasets. This variable is only used for the comparison between the
# survey sample and the population statistics
E1 <- read_csv("Data_raw/country_psid_1.csv")
names(E1)[names(E1) == "20.09 Status checked"] <- "dynata_edu"
E1 <- filter(E1, !dynata_edu %in% c("#N/A", "0"))
E1 <- E1 %>% select(psid, country, dynata_edu)

E2 <- read_csv("Data_raw/country_psid_2.csv")
names(E2)[names(E2) == "Status"] <- "dynata_edu"
names(E2)[names(E2) == "Country"] <- "country"
E2 <- filter(E2, !dynata_edu %in% c("#N/A", "0"))
E2 <- E2 %>% select(psid, country, dynata_edu)

E3 <- read_csv("Data_raw/country_psid_3.csv")
names(E3)[names(E3) == "Education"] <- "dynata_edu"
E3 <- filter(E3, !dynata_edu %in% c("#N/A", "0"))
E3 <- E3 %>% select(psid, country, dynata_edu)

E <- bind_rows(E3, E2, E1)
E <- subset(E, !duplicated(psid))

D <- read_csv("Data_raw/RAW-US+Citizen+Survey_November+4%2C+2022_08.08.csv")[-c(1, 2), ]

# Remove a singkle observation that Dynata themselves created to test the survey
D <- subset(D, !is.na(psid) & psid != "8oA2Ifs86RLoIZZovY8ODQ**")

# Merge in the Dynata education variable
D$dynata_edu <- E$dynata_edu[match(D$psid, E$psid)]

D <- D %>%
     rename(duration = `Duration (in seconds)`,
            V1_incivility = Q17, V2_incivility = Q77,
            V1_prejudice = Q19_1, V2_prejudice = Q78_1, 
            V1_opinion_diff = Q19_2, V2_opinion_diff = Q78_2, 
            V1_dislike_party = Q19_3, V2_dislike_party = Q78_3, 
            V1_discourage_pol = Q19_4, V2_discourage_pol = Q78_4, 
            V1_dissatisfied = Q19_5, V2_dissatisfied = Q78_5, 
            V1_troll_pol = Q19_6, V2_troll_pol = Q78_6, 
            V1_troll_user = Q19_7, V2_troll_user = Q78_7,
            
            conjoint1 = Q25, conjoint2 = Q47, conjoint3 = Q49,
            conjoint4 = Q51, conjoint5 = Q53__65,

            resp_ideology = Q21, resp_gender = Q22, resp_party = Q36__75,
            resp_age = Q34__71, resp_education = Q35__72, resp_region = Q42,
            resp_exposure = Q30,
            attention_check = Q41) %>%
     mutate(duration = as.numeric(duration))

D <- D %>%
     mutate(V1_incivility = recode(V1_incivility, "10 (Extremely disrespectful)" = "10"),
            V1_incivility = recode(V1_incivility, "0 (Not disrespectful)" = "0"),
            V2_incivility = recode(V2_incivility, "10 (Extremely disrespectful)" = "10"),
            V2_incivility = recode(V2_incivility, "0 (Not disrespectful)" = "0")) %>%
     mutate(concern_disrespect = recode(Q32, "Not concerned at all" = 1,
                                             "Slightly concerned" = 2,
                                             "Somewhat concerned" = 3,
                                             "Moderately concerned" = 4,
                                             "Extremely concerned" = 5), 
            govt_action = recode(Q36__28, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly agree" = 4, "Somewhat agree" = 5, "Strongly agree" = 6)) %>%
     mutate(V1_incivility = as.numeric(V1_incivility),
            V2_incivility = as.numeric(V2_incivility)) %>%
     mutate(V1_prejudice = recode(V1_prejudice, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V2_prejudice = recode(V2_prejudice, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V1_opinion_diff = recode(V1_opinion_diff, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V2_opinion_diff = recode(V2_opinion_diff, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V1_dislike_party = recode(V1_dislike_party, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V2_dislike_party = recode(V2_dislike_party, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V1_discourage_pol = recode(V1_discourage_pol, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V2_discourage_pol = recode(V2_discourage_pol, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),

            V1_dissatisfied = recode(V1_dissatisfied, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V2_dissatisfied = recode(V2_dissatisfied, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V1_troll_pol = recode(V1_troll_pol, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V2_troll_pol = recode(V2_troll_pol, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V1_troll_user = recode(V1_troll_user, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6),
            V2_troll_user = recode(V2_troll_user, "Strongly disagree" = 1, "Somewhat disagree" = 2, "Slightly disagree" = 3, "Slightly Agree" = 4, "Somewhat Agree" = 5, "Strongly agree" = 6)) %>%
     mutate(resp_ideology = recode(resp_ideology, "(Left)\n0" = "0", "(Right)\n10" = "10"),
            resp_gender = recode(resp_gender, "Female" = "Female", "Male" = "Male", "Other" = "Other"),
            resp_age = as.numeric(resp_age),
            resp_age_group = case_when(resp_age > 1998 ~ "18-24",
                                      resp_age %in% 1988:1997 ~ "25-34",
                                      resp_age %in% 1977:1987 ~ "35-44",
                                      resp_age %in% 1968:1976 ~ "45-54",
                                      resp_age %in% 1958:1967 ~ "55-64",
                                      resp_age < 1958 ~ "65+"),
            resp_education = recode(resp_education, "No formal education / primary school" = "Below high school",
                                                    "Secondary school" = "Secondary school",
                                                    "Higher non-university" = "Higher non-university",
                                                    "Higher university" = "University degree"),
            resp_ideology = as.numeric(resp_ideology),
            resp_exposure_binary = ifelse(!is.na(resp_exposure), "Experienced social media harassment", "Has not experienced social media harassment"),
            resp_exposure_sexist_remarks = ifelse((is.na(resp_exposure) | !str_detect(resp_exposure, "Sexist remarks")), "No sexist remarks", "Sexist remarks"),
            resp_exposure_disrespectful_remarks = ifelse((is.na(resp_exposure) | !str_detect(resp_exposure, "Highly disrespectful comments")), "No disrespectful remarks", "Disrespectful remarks"),
            resp_exposure_sexualized_images = ifelse((is.na(resp_exposure) | !str_detect(resp_exposure, "Humiliating or sexually charged images of yourself spread through social media")), "No humiliating/sexually charged images of oneself", "Humiliating/sexually charged images of oneself"),
            resp_exposure_violent_threats = ifelse((is.na(resp_exposure) | !str_detect(resp_exposure, "Threats of violence")), "No violent threats", "Violent threats"),
            resp_exposure_persistent_harassment = ifelse((is.na(resp_exposure) | !str_detect(resp_exposure, "Harassment \\(exposure to insistent and uninvited behaviour\\)")), "No persistent harassment", "Persistent harassment"),
            resp_personal_disrespect = recode(Q31, "Several times a day" = "Several times a day",
                                                   "About once a day" = "About once a day",
                                                   "3 to 6 times a week" = "3 to 6 days a week",
                                                   "1 to 2 days a week" = "1 to 2 days a week",
                                                   "Every few weeks" = "Every few weeks",
                                                   "Less often" = "Less often",
                                                   "Never" = "Never"),
            resp_other_disrespect = recode(Q37, "Several times a day" = "Several times a day",
                                                "About once a day" = "About once a day",
                                                "3 to 6 times a week" = "3 to 6 days a week",
                                                "1 to 2 days a week" = "1 to 2 days a week",
                                                "Every few weeks" = "Every few weeks",
                                                "Less often" = "Less often",
                                                "Never" = "Never"),
            resp_party = recode(resp_party, "Other (please specify which party):" = "Other",
                                            "I do not consider myself close to a particular party" = "Does not identify",
                                            "Democratic Party" = "Democratic Party",
                                            "Republican Party" = "Republican Party")) %>%
     mutate(resp_ideology_discrete = case_when(resp_ideology %in% 0:4 ~ "Left-wing",
                                               resp_ideology %in% 5 ~ "Center",
                                               resp_ideology %in% 6:10 ~ "Right-wing"))

# Proportion who passed the attention check
passed_attention <- mean(D$attention_check == "I have a question", na.rm = TRUE)
passed_attention

D <- subset(D, attention_check == "I have a question" & !duplicated(psid))


# CLEAN THE SINGLE VIGNETTE DATA

V <- tibble(country = "United States",
            respondent_type = "Citizen",
            respondent_type_three = "Citizen",
            id = rep(paste("US citizen", 1:nrow(D)), 2),
            dynata_edu = rep(D$dynata_edu, 2),
            resp_gender = rep(D$resp_gender, 2),
            resp_age_group = rep(D$resp_age_group, 2),
            resp_education = rep(D$resp_education, 2),
            resp_region = rep(D$resp_region, 2),
            resp_party = rep(D$resp_party, 2),
            resp_ideology = rep(D$resp_ideology, 2),
            resp_ideology_discrete = rep(D$resp_ideology_discrete, 2),
            resp_exposure_binary = rep(D$resp_exposure_binary, 2),
            resp_exposure_sexist_remarks = rep(D$resp_exposure_sexist_remarks, 2),
            resp_exposure_disrespectful_remarks = rep(D$resp_exposure_disrespectful_remarks, 2),
            resp_exposure_sexualized_images = rep(D$resp_exposure_sexualized_images, 2),
            resp_exposure_violent_threats = rep(D$resp_exposure_violent_threats, 2),
            resp_exposure_persistent_harassment = rep(D$resp_exposure_persistent_harassment, 2),
            resp_personal_disrespect = rep(D$resp_personal_disrespect, 2),
            resp_other_disrespect = rep(D$resp_other_disrespect, 2),
            concern_disrespect = c(D$concern_disrespect, rep(NA, length(D$concern_disrespect))),
            govt_action = c(D$govt_action, rep(NA, length(D$govt_action))),

            incivility = c(D$V1_incivility, D$V2_incivility),
            task_number = rep(paste0("Task ", 1:2), each = nrow(D)),
            
            party_pol = c(D$`V-1-1-1`, D$`V-2-1-1`),
            woman_pol = c(D$`V-1-1-2`, D$`V-2-1-2`),
            text_pol = c(D$`V-1-1-3`, D$`V-2-1-3`),
            woman_user = c(D$`V-1-1-4`, D$`V-2-1-4`),
            text_user = c(D$`V-1-1-5`, D$`V-2-1-5`),
            gendered = c(D$`V-1-1-6`, D$`V-2-1-6`),
            
            prejudice = c(D$V1_prejudice, D$V2_prejudice),
            opinion_diff = c(D$V1_opinion_diff, D$V2_opinion_diff),
            dislike_party = c(D$V1_dislike_party, D$V2_dislike_party),
            discourage_pol = c(D$V1_discourage_pol, D$V2_discourage_pol),
            dissatisfied = c(D$V1_dissatisfied, D$V2_dissatisfied),
            troll_pol = c(D$V1_troll_pol, D$V2_troll_pol),
            troll_user = c(D$V1_troll_user, D$V2_troll_user))

V <- V %>%
     mutate(party_pol = recode(party_pol, "Party01" = "Democratic Party", "Party02" = "Republican Party"),
            poc_pol = recode(woman_pol, "Pol01" = "White politician", "Pol02" = "White politician", "Pol03" = "Person of Color politician",
                                        "Pol04" = "Person of Color politician", "Pol05" = "White politician", "Pol06" = "White politician",
                                        "Pol07" = "White politician", "Pol08" = "Person of Color politician", "Pol09" = "White politician",
                                        "Pol10" = "White politician", "Pol11" = "White politician", "Pol12" = "Person of Color politician",
                                        "Pol13" = "Person of Color politician", "Pol14" = "White politician", "Pol15" = "White politician",
                                        "Pol16" = "White politician", "Pol17" = "Person of Color politician", "Pol18" = "Person of Color politician"),
            woman_pol = recode(woman_pol, "Pol01" = "Woman politician", "Pol02" = "Woman politician", "Pol03" = "Woman politician",
                                          "Pol04" = "Woman politician", "Pol05" = "Woman politician", "Pol06" = "Woman politician",
                                          "Pol07" = "Woman politician", "Pol08" = "Woman politician", "Pol09" = "Woman politician",
                                          "Pol10" = "Man politician", "Pol11" = "Man politician", "Pol12" = "Man politician",
                                          "Pol13" = "Man politician", "Pol14" = "Man politician", "Pol15" = "Man politician",
                                          "Pol16" = "Man politician", "Pol17" = "Man politician", "Pol18" = "Man politician"),
            text_pol_group = recode(text_pol, "PolText01" = "Economy", "PolText02" = "Economy",
                                              "PolText03" = "Economy", "PolText04" = "Economy",
                                              "PolText05" = "Healthcare", "PolText06" = "Healthcare",
                                              "PolText07" = "Healthcare", "PolText08" = "Healthcare",
                                              "PolText09" = "Education", "PolText10" = "Education",
                                              "PolText11" = "Education", "PolText12" = "Education",
                                              "PolText13" = "Crime", "PolText14" = "Crime",
                                              "PolText15" = "Crime", "PolText16" = "Crime",
                                              "PolText17" = "National security", "PolText18" = "National security",
                                              "PolText19" = "National security", "PolText20" = "National security"),
            woman_user = recode(woman_user, "User01" = "Woman user", "User02" = "Woman user", "User03" = "Woman user",
                                            "User04" = "Woman user", "User05" = "Woman user", "User06" = "Woman user",
                                            "User07" = "Woman user", "User08" = "Woman user", "User09" = "Woman user",
                                            "User10" = "Man user", "User11" = "Man user", "User12" = "Man user",
                                            "User13" = "Man user", "User14" = "Man user", "User15" = "Man user",
                                            "User16" = "Man user", "User17" = "Man user", "User18" = "Man user"),
            # text_user = recode(text_user, "" = "", "" = ""),
            gendered = recode(gendered, "Gendered01" = "Non-gendered text", "Gendered02" = "Gendered text"))

V$party_pol_copartisan <- "Non co-partisan"
V$party_pol_copartisan[V$party_pol == "Democratic Party" & V$resp_party %in% c("Democratic Party") |
                       V$party_pol == "Republican Party" & V$resp_party %in% c("Republican Party")] <- "Co-partisan"

V <- V %>% arrange(id)

write_csv(V, "Data_Cleaned/US_Citizen_Vignette.csv")


# CLEAN THE PAIRED CONJOINT DATA

C <- tibble(country = "United States",
            respondent_type = "Citizen",
            respondent_type_three = "Citizen",
            id = rep(paste("US citizen", 1:nrow(D)), 10),
            duration = rep(D$duration, 10),
            resp_gender = rep(D$resp_gender, 10),
            resp_age_group = rep(D$resp_age_group, 10),
            resp_education = rep(D$resp_education, 10),
            resp_region = rep(D$resp_region, 10),
            resp_party = rep(D$resp_party, 10),
            resp_ideology = rep(D$resp_ideology, 10),
            resp_ideology_discrete = rep(D$resp_ideology_discrete, 10),
            resp_exposure_binary = rep(D$resp_exposure_binary, 10),

            task_number = paste0("Task ", rep(rep(1:5, each = 2), nrow(D))),

            chose_profile = c(as.numeric(D$conjoint1 == "Conversation A"),
                              as.numeric(D$conjoint1 == "Conversation B" | D$conjoint1 == "Conversation B"),
                              as.numeric(D$conjoint2 == "Conversation A"),
                              as.numeric(D$conjoint2 == "Conversation B" | D$conjoint2 == "Conversation B"),
                              as.numeric(D$conjoint3 == "Conversation A"),
                              as.numeric(D$conjoint3 == "Conversation B" | D$conjoint3 == "Conversation B"),
                              as.numeric(D$conjoint4 == "Conversation A"),
                              as.numeric(D$conjoint4 == "Conversation B" | D$conjoint4 == "Conversation B"),
                              as.numeric(D$conjoint5 == "Conversation A"),
                              as.numeric(D$conjoint5 == "Conversation B" | D$conjoint5 == "Conversation B")),

            party_pol = c(D$`F-1-1-1`, D$`F-1-2-1`, D$`F-2-1-1`, D$`F-2-2-1`, D$`F-3-1-1`, D$`F-3-2-1`, D$`F-4-1-1`, D$`F-4-2-1`, D$`F-5-1-1`, D$`F-5-2-1`),
            woman_pol = c(D$`F-1-1-2`, D$`F-1-2-2`, D$`F-2-1-2`, D$`F-2-2-2`, D$`F-3-1-2`, D$`F-3-2-2`, D$`F-4-1-2`, D$`F-4-2-2`, D$`F-5-1-2`, D$`F-5-2-2`),
            text_pol = c(D$`F-1-1-3`, D$`F-1-2-3`, D$`F-2-1-3`, D$`F-2-2-3`, D$`F-3-1-3`, D$`F-3-2-3`, D$`F-4-1-3`, D$`F-4-2-3`, D$`F-5-1-3`, D$`F-5-2-3`),
            woman_user = c(D$`F-1-1-4`, D$`F-1-2-4`, D$`F-2-1-4`, D$`F-2-2-4`, D$`F-3-1-4`, D$`F-3-2-4`, D$`F-4-1-4`, D$`F-4-2-4`, D$`F-5-1-4`, D$`F-5-2-4`),
            text_user = c(D$`F-1-1-5`, D$`F-1-2-5`, D$`F-2-1-5`, D$`F-2-2-5`, D$`F-3-1-5`, D$`F-3-2-5`, D$`F-4-1-5`, D$`F-4-2-5`, D$`F-5-1-5`, D$`F-5-2-5`),
            gendered = c(D$`F-1-1-6`, D$`F-1-2-6`, D$`F-2-1-6`, D$`F-2-2-6`, D$`F-3-1-6`, D$`F-3-2-6`, D$`F-4-1-6`, D$`F-4-2-6`, D$`F-5-1-6`, D$`F-5-2-6`))

C <- C %>%
     mutate(party_pol = recode(party_pol, "Party01" = "Democratic Party", "Party02" = "Republican Party"),
            poc_pol = recode(woman_pol, "Pol01" = "White politician", "Pol02" = "White politician", "Pol03" = "Person of Color politician",
                                        "Pol04" = "Person of Color politician", "Pol05" = "White politician", "Pol06" = "White politician",
                                        "Pol07" = "White politician", "Pol08" = "Person of Color politician", "Pol09" = "White politician",
                                        "Pol10" = "White politician", "Pol11" = "White politician", "Pol12" = "Person of Color politician",
                                        "Pol13" = "Person of Color politician", "Pol14" = "White politician", "Pol15" = "White politician",
                                        "Pol16" = "White politician", "Pol17" = "Person of Color politician", "Pol18" = "Person of Color politician"),
            woman_pol = recode(woman_pol, "Pol01" = "Woman politician", "Pol02" = "Woman politician", "Pol03" = "Woman politician",
                                          "Pol04" = "Woman politician", "Pol05" = "Woman politician", "Pol06" = "Woman politician",
                                          "Pol07" = "Woman politician", "Pol08" = "Woman politician", "Pol09" = "Woman politician",
                                          "Pol10" = "Man politician", "Pol11" = "Man politician", "Pol12" = "Man politician",
                                          "Pol13" = "Man politician", "Pol14" = "Man politician", "Pol15" = "Man politician",
                                          "Pol16" = "Man politician", "Pol17" = "Man politician", "Pol18" = "Man politician"),
            text_pol_group = recode(text_pol, "PolText01" = "Economy", "PolText02" = "Economy",
                                              "PolText03" = "Economy", "PolText04" = "Economy",
                                              "PolText05" = "Healthcare", "PolText06" = "Healthcare",
                                              "PolText07" = "Healthcare", "PolText08" = "Healthcare",
                                              "PolText09" = "Education", "PolText10" = "Education",
                                              "PolText11" = "Education", "PolText12" = "Education",
                                              "PolText13" = "Crime", "PolText14" = "Crime",
                                              "PolText15" = "Crime", "PolText16" = "Crime",
                                              "PolText17" = "National security", "PolText18" = "National security",
                                              "PolText19" = "National security", "PolText20" = "National security"),
            woman_user = recode(woman_user, "User01" = "Woman user", "User02" = "Woman user", "User03" = "Woman user",
                                            "User04" = "Woman user", "User05" = "Woman user", "User06" = "Woman user",
                                            "User07" = "Woman user", "User08" = "Woman user", "User09" = "Woman user",
                                            "User10" = "Man user", "User11" = "Man user", "User12" = "Man user",
                                            "User13" = "Man user", "User14" = "Man user", "User15" = "Man user",
                                            "User16" = "Man user", "User17" = "Man user", "User18" = "Man user"),
            # text_user = recode(text_user, "" = "", "" = ""),
            gendered = recode(gendered, "Gendered01" = "Non-gendered text", "Gendered02" = "Gendered text"))

C$party_pol_copartisan <- "Non co-partisan"
C$party_pol_copartisan[C$party_pol == "Democratic Party" & C$resp_party %in% c("Democratic Party") |
                       C$party_pol == "Republican Party" & C$resp_party %in% c("Republican Party")] <- "Co-partisan"


C <- C %>% arrange(id)

write_csv(C, "Data_Cleaned/US_Citizen_Conjoint.csv")


